Pilot data Parse bio 4 iPSC lines NPCs, 3 batches
Load libraries
library(Seurat)
The legacy packages maptools, rgdal, and rgeos, underpinning this package
will retire shortly. Please refer to R-spatial evolution reports on
https://r-spatial.org/r/2023/05/15/evolution4.html for details.
This package is now running under evolution status 0
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching SeuratObject
Warning message:
R graphics engine version 15 is not supported by this version of RStudio. The Plots tab will be disabled until a newer version of RStudio is installed.
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.2 ✔ purrr 1.0.1
✔ tibble 3.2.1 ✔ dplyr 1.1.2
✔ tidyr 1.3.0 ✔ stringr 1.5.0
✔ readr 2.1.3 ✔ forcats 0.5.2
── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
#library(CelltypeR)
seu
An object of class Seurat
58395 features across 23769 samples within 1 assay
Active assay: RNA (58395 features, 2500 variable features)
2 dimensional reductions calculated: pca, umap
Idents(seu) <- 'sample'
VlnPlot(seu, pt.size = 0.001, features = c("nFeature_RNA"))
VlnPlot(seu, pt.size = 0.001, features = "nCount_RNA")
VlnPlot(seu, pt.size = 0.001, features = "percent.mt")
Add in labels for batch and disease status and line
#library("CelltypeR")
# CelltypeR library is a library I (Rhalena) made for flow cytometry but uses the seurat object and I made a quick add annotations function.
Seurat_Parse12sample <- seu
# here is the function
annotate <- function(seu, annotations, to_label, annotation_name = "CellType"){
Idents(seu) <- to_label
names(annotations) <- levels(seu)
seu <- RenameIdents(seu, annotations)
seu <- AddMetaData(object=seu, metadata=Idents(seu), col.name = annotation_name)
}
Idents(Seurat_Parse12sample) <- "sample"
sample.levels <- levels(Seurat_Parse12sample)
# should give the order of sample
# test
Seurat_Parse12sample <- annotate(Seurat_Parse12sample, annotations = sample.levels, to_label = "sample",annotation_name = "sample.test")
table(Seurat_Parse12sample$sample.test)
x2965B3 x2965B1 TD07B3 x3448B1 x3448B2 TD22B3 TD07B2 x3448B3 x2965B2
1918 2353 1690 2400 2194 2894 2124 1530 1686
TD07B1 TD22B2 TD22B1
1496 2186 1298
table(Seurat_Parse12sample$sample)
TD07B1 TD07B2 TD07B3 TD22B1 TD22B2 TD22B3 x2965B1 x2965B2 x2965B3
1496 2124 1690 1298 2186 2894 2353 1686 1918
x3448B1 x3448B2 x3448B3
2400 2194 1530
# these match
#input vector we got from the seurat object
# Define regular expression to match first part of the string
pattern <- "^[A-Za-z]+"
# Use gsub() to replace the first part of the string with an empty string
sample.levels.new <- gsub(pattern, "", sample.levels)
# Extract B1, B2, B3 from new vector
batch <- gsub(".*B", "B", sample.levels.new)
Seurat_Parse12sample <- annotate(Seurat_Parse12sample, annotations = batch, to_label = "sample",annotation_name = "Batch")
table(Seurat_Parse12sample$Batch)
B3 B1 B2
8032 7547 8190
table(Seurat_Parse12sample$Batch,Seurat_Parse12sample$sample)
TD07B1 TD07B2 TD07B3 TD22B1 TD22B2 TD22B3 x2965B1 x2965B2 x2965B3
B3 0 0 1690 0 0 2894 0 0 1918
B1 1496 0 0 1298 0 0 2353 0 0
B2 0 2124 0 0 2186 0 0 1686 0
x3448B1 x3448B2 x3448B3
B3 0 0 1530
B1 2400 0 0
B2 0 2194 0
# add the cell line name
# sample vector is still the input vector
# Define regular expression to remove B1, B2, and B3
pattern <- "B[1-3]$"
# Use gsub() to remove B1, B2, and B3 from original vector
sample.levels.new <- gsub(pattern, "", sample.levels)
# Extract starting values from new vector
ipscline <- gsub("B[1-3]$", "", sample.levels.new)
ipscline
[1] "x2965" "x2965" "TD07" "x3448" "x3448" "TD22" "TD07" "x3448" "x2965"
[10] "TD07" "TD22" "TD22"
Seurat_Parse12sample <- annotate(Seurat_Parse12sample, annotations = ipscline, to_label = "sample",annotation_name = "IPSC_Line")
table(Seurat_Parse12sample$IPSC_Line)
x2965 TD07 x3448 TD22
5957 5310 6124 6378
table(Seurat_Parse12sample$IPSC_Line,Seurat_Parse12sample$sample)
TD07B1 TD07B2 TD07B3 TD22B1 TD22B2 TD22B3 x2965B1 x2965B2 x2965B3
x2965 0 0 0 0 0 0 2353 1686 1918
TD07 1496 2124 1690 0 0 0 0 0 0
x3448 0 0 0 0 0 0 0 0 0
TD22 0 0 0 1298 2186 2894 0 0 0
x3448B1 x3448B2 x3448B3
x2965 0 0 0
TD07 0 0 0
x3448 2400 2194 1530
TD22 0 0 0
# add disease status
# we need to know the order of the lines
Idents(Seurat_Parse12sample) <- "IPSC_Line"
line.levels <- levels(Seurat_Parse12sample)
line.levels
[1] "x2965" "TD07" "x3448" "TD22"
PDstatus <- c("PD","PD","Con","Con") # if TD07 and 2965 are PD lines and TD22 and 3448 are control lines
Seurat_Parse12sample <- annotate(Seurat_Parse12sample, annotations = PDstatus, to_label = "IPSC_Line",annotation_name = "DiseaseStatus")
table(Seurat_Parse12sample$DiseaseStatus)
PD Con
11267 12502
table(Seurat_Parse12sample$DiseaseStatus,Seurat_Parse12sample$IPSC_Line)
x2965 TD07 x3448 TD22
PD 5957 5310 0 0
Con 0 0 6124 6378
table(Seurat_Parse12sample$Batch,Seurat_Parse12sample$IPSC_Line)
x2965 TD07 x3448 TD22
B3 1918 1690 1530 2894
B1 2353 1496 2400 1298
B2 1686 2124 2194 2186
Save info
saveRDS(Seurat_Parse12sample, "Parse12sample4lines3batchJuly7.RDS")
Align the cell lines and batches, we will align across the 12 samples
# make a list of seurat objects by our cell type variable
sublist <- SplitObject(seu, split.by = "sample")
# normalize and find variable features
for (i in 1:length(sublist)){
sublist[[i]] <- NormalizeData(sublist[[i]], verbose = FALSE)
sublist[[i]] <- FindVariableFeatures(sublist[[i]], selection.method = "vst")
}
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# Create an empty Seurat object to store the integrated data
# Take the first Seurat object from the list as the starting point
integrated_seurat <- subset(sublist[[1]])
# Iterate over the list of Seurat objects
for (i in 1:length(sublist)) {
# Rename the 'orig.ident' metadata inside the seurat object to match the object name in the list
sublist[[i]]$orig.ident <- names(sublist)[i]
}
sample.list <- sublist
for (i in 1:length(sample.list)) {
# Normalize and scale the data
sample.list[[i]] <- NormalizeData(sample.list[[i]], verbose = FALSE)
sample.list[[i]] <- ScaleData(sample.list[[i]], verbose = FALSE)
# Find variable features
sample.list[[i]] <- FindVariableFeatures(sample.list[[i]], selection.method = "vst")
# Get the variable features
variable_features <- VariableFeatures(sample.list[[i]])
# Run PCA with the variable features
sample.list[[i]] <- RunPCA(sample.list[[i]], verbose = FALSE, npcs = 30, features = variable_features)
}
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
int.anchors <- FindIntegrationAnchors(object.list = sample.list, dims = 1:30, reduction = "rpca")
Computing 2000 integration features
Scaling features for provided objects
| | 0 % ~calculating
|+++++ | 8 % ~05s
|+++++++++ | 17% ~04s
|+++++++++++++ | 25% ~03s
|+++++++++++++++++ | 33% ~03s
|+++++++++++++++++++++ | 42% ~02s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++++++ | 58% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
Computing within dataset neighborhoods
| | 0 % ~calculating
|+++++ | 8 % ~07s
|+++++++++ | 17% ~06s
|+++++++++++++ | 25% ~05s
|+++++++++++++++++ | 33% ~04s
|+++++++++++++++++++++ | 42% ~04s
|+++++++++++++++++++++++++ | 50% ~03s
|++++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Finding all pairwise anchors
| | 0 % ~calculating
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 2868 anchors
|+ | 2 % ~03m 20s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 545 anchors
|++ | 3 % ~03m 01s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 533 anchors
|+++ | 5 % ~02m 54s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 558 anchors
|++++ | 6 % ~02m 53s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 549 anchors
|++++ | 8 % ~02m 56s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 424 anchors
|+++++ | 9 % ~02m 50s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 467 anchors
|++++++ | 11% ~02m 46s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 427 anchors
|+++++++ | 12% ~02m 45s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 340 anchors
|+++++++ | 14% ~02m 41s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 573 anchors
|++++++++ | 15% ~02m 38s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 915 anchors
|+++++++++ | 17% ~02m 38s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 912 anchors
|++++++++++ | 18% ~02m 37s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 519 anchors
|++++++++++ | 20% ~02m 34s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 766 anchors
|+++++++++++ | 21% ~02m 33s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 738 anchors
|++++++++++++ | 23% ~02m 31s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 531 anchors
|+++++++++++++ | 24% ~02m 27s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 505 anchors
|+++++++++++++ | 26% ~02m 24s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 464 anchors
|++++++++++++++ | 27% ~02m 20s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 518 anchors
|+++++++++++++++ | 29% ~02m 17s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 782 anchors
|++++++++++++++++ | 30% ~02m 14s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 892 anchors
|++++++++++++++++ | 32% ~02m 12s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 549 anchors
|+++++++++++++++++ | 33% ~02m 09s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 493 anchors
|++++++++++++++++++ | 35% ~02m 05s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 432 anchors
|+++++++++++++++++++ | 36% ~02m 01s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 2894 anchors
|+++++++++++++++++++ | 38% ~01m 58s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 569 anchors
|++++++++++++++++++++ | 39% ~01m 55s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 713 anchors
|+++++++++++++++++++++ | 41% ~01m 52s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 494 anchors
|++++++++++++++++++++++ | 42% ~01m 49s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 798 anchors
|++++++++++++++++++++++ | 44% ~01m 46s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 724 anchors
|+++++++++++++++++++++++ | 45% ~01m 43s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 398 anchors
|++++++++++++++++++++++++ | 47% ~01m 39s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 456 anchors
|+++++++++++++++++++++++++ | 48% ~01m 37s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 415 anchors
|+++++++++++++++++++++++++ | 50% ~01m 34s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 931 anchors
|++++++++++++++++++++++++++ | 52% ~01m 31s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 570 anchors
|+++++++++++++++++++++++++++ | 53% ~01m 28s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 419 anchors
|++++++++++++++++++++++++++++ | 55% ~01m 25s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 553 anchors
|+++++++++++++++++++++++++++++ | 56% ~01m 22s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 573 anchors
|+++++++++++++++++++++++++++++ | 58% ~01m 19s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 2037 anchors
|++++++++++++++++++++++++++++++ | 59% ~01m 16s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 409 anchors
|+++++++++++++++++++++++++++++++ | 61% ~01m 13s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 413 anchors
|++++++++++++++++++++++++++++++++ | 62% ~01m 10s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 566 anchors
|++++++++++++++++++++++++++++++++ | 64% ~01m 07s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 472 anchors
|+++++++++++++++++++++++++++++++++ | 65% ~01m 04s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 401 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~01m 01s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 365 anchors
|+++++++++++++++++++++++++++++++++++ | 68% ~58s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 847 anchors
|+++++++++++++++++++++++++++++++++++ | 70% ~56s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 835 anchors
|++++++++++++++++++++++++++++++++++++ | 71% ~53s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 475 anchors
|+++++++++++++++++++++++++++++++++++++ | 73% ~50s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 772 anchors
|++++++++++++++++++++++++++++++++++++++ | 74% ~47s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 740 anchors
|++++++++++++++++++++++++++++++++++++++ | 76% ~45s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 3274 anchors
|+++++++++++++++++++++++++++++++++++++++ | 77% ~42s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 796 anchors
|++++++++++++++++++++++++++++++++++++++++ | 79% ~39s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 717 anchors
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~37s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 811 anchors
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~34s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 529 anchors
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~31s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 1402 anchors
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~28s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 1446 anchors
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~25s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 530 anchors
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~22s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 582 anchors
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~19s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 443 anchors
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~17s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 1047 anchors
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~14s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 405 anchors
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~11s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 483 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~08s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 595 anchors
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~06s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 570 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 98% ~03s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 1021 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03m 01s
integrated_seurat <- IntegrateData(anchorset = int.anchors, dims = 1:30)
Merging dataset 8 into 4
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 11 into 6
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 1 into 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 10 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 12 into 2 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 9 into 6 11
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 2 1 12 into 6 11 9
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 7 into 5
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 3 10 into 4 8
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 4 8 3 10 into 6 11 9 2 1 12
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 5 7 into 6 11 9 2 1 12 4 8 3 10
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
#
# must set the k weight to the lowest cell count
# in the parse sample we have over 1530 cells in the smallest count so we don't have to change the k from the 100 default
Now we need to run the workflow on the integrated object
DefaultAssay(integrated_seurat) <- "integrated"
integrated_seurat <- ScaleData(integrated_seurat, verbose = FALSE)
# only the integrated features will be the pca input
integrated_seurat <- RunPCA(integrated_seurat, npcs = 20, verbose = FALSE)
integrated_seurat <- RunUMAP(integrated_seurat, reduction = "pca", dims = 1:20, n.neighbors = 81)
Have a look at the new UMAP
DimPlot(integrated_seurat, group.by = 'sample')
DimPlot(integrated_seurat, group.by = 'Batch')
DimPlot(integrated_seurat, group.by = 'DiseaseStatus')
DimPlot(integrated_seurat, group.by = 'IPSC_Line')
NA
NA
NA
# saveRDS(integrated_seurat, "Integrated12samples.RDS")
# setwd("~/Documents/Data/scRNAseq/ParseExample/Experiment1-mini12")
intergrated_seurat <- readRDS("/Users/rhalenathomas/Documents/Data/scRNAseq/ParseExample/Experiment1-mini12/Integrated12samples.RDS")
Find new clusters
integrated_seurat <- FindClusters(integrated_seurat, resolution = c(0,0.3,0.6,1) )
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 23769
Number of edges: 3304295
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 1.0000
Number of communities: 1
Elapsed time: 16 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 23769
Number of edges: 3304295
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9097
Number of communities: 10
Elapsed time: 20 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 23769
Number of edges: 3304295
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8752
Number of communities: 16
Elapsed time: 23 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 23769
Number of edges: 3304295
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8382
Number of communities: 20
Elapsed time: 20 seconds
integrated_seurat <- intergrated_seurat
DimPlot(integrated_seurat, group.by = "integrated_snn_res.0.3")
DimPlot(integrated_seurat, group.by = "integrated_snn_res.0.6")
DimPlot(integrated_seurat, group.by = "integrated_snn_res.0.3", split.by = "DiseaseStatus")
table(integrated_seurat$DiseaseStatus)
Con PD
12502 11267
Annotate clusters res 0.3
write(ClusterMarkers, "clusterMarkersres03Integrated.csv")
Error in cat(x, file = file, sep = c(rep.int(sep, ncolumns - 1), "\n"), :
argument 1 (type 'list') cannot be handled by 'cat'
library(enrichR)
Welcome to enrichR
Checking connection ...
Enrichr ... Connection is Live!
FlyEnrichr ... Connection is available!
WormEnrichr ... Connection is available!
YeastEnrichr ... Connection is available!
FishEnrichr ... Connection is available!
OxEnrichr ... Connection is available!
setEnrichrSite("Enrichr") # Human genes
Connection changed to https://maayanlab.cloud/Enrichr/
Connection is Live!
# list of all the databases
# get the possible libraries
dbs <- listEnrichrDbs()
# this will list the possible libraries
dbs
# select libraries with cell types
db <- c('CellMarker_Augmented_2021','Azimuth_Cell_Types_2021')
# function for a quick look
checkCelltypes <- function(cluster_num = 0){
clusterX <- ClusterMarkers %>% filter(cluster == cluster_num & avg_log2FC > 0.25)
genes <- clusterX$gene
# the cell type libraries
# get the results for each library
clusterX.cell <- enrichr(genes, databases = db)
# visualize the results
print(plotEnrich(clusterX.cell[[1]], showTerms = 20, numChar = 40, y = "Count", orderBy = "P.value", title = 'CellMarker_Augmented_2021'))
print(plotEnrich(clusterX.cell[[2]], showTerms = 20, numChar = 40, y = "Count", orderBy = "P.value", title = 'Azimuth_Cell_Types_2021'))
}
#heatmap of top markers
top3 <- ClusterMarkers %>% group_by(cluster) %>% top_n(n=3, wt =avg_log2FC)
DoHeatmap(integrated_seurat, features = top3$gene, size = 3, angle = 90, group.by = "integrated_snn_res.0.3")
NA
NA
table(ClusterMarkers$cluster)
0 1 2 3 4 5 6 7 8 9
130 38 242 477 134 249 275 417 377 233
Check each cluster quickly
checkCelltypes(cluster_num = 3)
Uploading data to Enrichr... Done.
Querying CellMarker_Augmented_2021... Done.
Querying Azimuth_Cell_Types_2021... Done.
Parsing results... Done.
Look at some expression lists
da_neurons <- c("TH","SLC6A3","SLC18A2","SOX6","NDNF","SNCG","ALDH1A1","CALB1","TACR2","SLC17A6","SLC32A1","OTX2","GRP","LPL","CCK","VIP")
NPC_orStemLike <- c("DCX","NEUROD1","TBR1","PCNA","MKI67","SOX2","NES","PAX6","MASH1")
mature_neurons = c("RBFOX3","SYP","DLG45","VAMP1","VAMP2","TUBB3","SYT1","BSN","HOMER1","SLC17A6")
excitatory_neurons = c("GRIA2","GRIA1","GRIA4","GRIN1","GRIN2B","GRIN2A","GRIN3A","GRIN3","GRIP1","CAMK2A")
inhbitory_neurons = inh = c("GAD1","GAD2", "GAT1","PVALB","GABR2","GABR1","GBRR1","GABRB2","GABRB1","GABRB3","GABRA6","GABRA1","GABRA4","TRAK2")
astrocytes <- c("GFAP","S100B","AQP4","APOE", "SOX9","SLC1A3")
oligodendrocytes <- c("MBP","MOG","OLIG1","OLIG2","SOX10")
opc <-
radial_glia <- c("PTPRC","AIF1","ADGRE1", "VIM", "TNC","PTPRZ1","FAM107A","HOPX","LIFR",
"ITGB5","IL6ST","SLC1A3")
epithelial <- c("HES1","HES5","SOX2","SOX10","NES","CDH1","NOTCH1")
microglia <- c("IBA1","P2RY12","P2RY13","TREM119", "GPR34","SIGLECH","TREM2",
"CX3CR1","FCRLS","OLFML3","HEXB","TGFBR1", "SALL1","MERTK",
"PROS1")
features_list <- c("MKI67","SOX2","POU5F1","DLX2","PAX6","SOX9","HES1","NES","RBFOX3","MAP2","NCAM1","CD24","GRIA2","GRIN2B","GABBR1","GAD1","GAD2","GABRA1","GABRB2","TH","ALDH1A1","LMX1B","NR4A2","CORIN","CALB1","KCNJ6","CXCR4","ITGA6","SLC1A3","CD44","AQP4","S100B", "PDGFRA","OLIG2","MBP","CLDN11","VIM","VCAM1")
short_list <- c("MKI67","SOX9","HES1","NES","DLX2","RBFOX3","MAP2","TH","CALB1","KCNJ6","SLC1A3","CD44","AQP4","S100B","OLIG2","MBP","VIM")
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in da_neurons) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find TH in the default search locations, found in RNA assay instead
Warning: Could not find SLC6A3 in the default search locations, found in RNA assay instead
Warning: Could not find SLC18A2 in the default search locations, found in RNA assay instead
Warning: Could not find SNCG in the default search locations, found in RNA assay instead
Warning: Could not find ALDH1A1 in the default search locations, found in RNA assay instead
Warning: Could not find TACR2 in the default search locations, found in RNA assay instead
Warning: Could not find SLC32A1 in the default search locations, found in RNA assay instead
Warning: Could not find OTX2 in the default search locations, found in RNA assay instead
Warning: Could not find GRP in the default search locations, found in RNA assay instead
Warning: Could not find CCK in the default search locations, found in RNA assay instead
Warning: Could not find VIP in the default search locations, found in RNA assay instead
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in NPC_orStemLike) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find PCNA in the default search locations, found in RNA assay instead
Warning: Could not find SOX2 in the default search locations, found in RNA assay instead
Warning: Could not find NES in the default search locations, found in RNA assay instead
Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
The following requested variables were not found: MASH1
Error: None of the requested features were found: MASH1 in slot data
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in astrocytes) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find GFAP in the default search locations, found in RNA assay instead
Warning: Could not find S100B in the default search locations, found in RNA assay instead
Warning: Could not find AQP4 in the default search locations, found in RNA assay instead
Warning: Could not find APOE in the default search locations, found in RNA assay instead
Warning: Could not find SOX9 in the default search locations, found in RNA assay instead
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in radial_glia) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find PTPRC in the default search locations, found in RNA assay instead
Warning: Could not find AIF1 in the default search locations, found in RNA assay instead
Warning: Could not find ADGRE1 in the default search locations, found in RNA assay instead
Warning: Could not find VIM in the default search locations, found in RNA assay instead
Warning: Could not find PTPRZ1 in the default search locations, found in RNA assay instead
Warning: Could not find FAM107A in the default search locations, found in RNA assay instead
Warning: Could not find HOPX in the default search locations, found in RNA assay instead
Warning: Could not find ITGB5 in the default search locations, found in RNA assay instead
Warning: Could not find IL6ST in the default search locations, found in RNA assay instead
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in mature_neurons) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find SYP in the default search locations, found in RNA assay instead
Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
The following requested variables were not found: DLG45
Error: None of the requested features were found: DLG45 in slot data
Idents(integrated_seurat) <- "integrated_snn_res.0.3"
for (i in excitatory_neurons) {
print(FeaturePlot(integrated_seurat, features = i, min.cutoff = 'q1', max.cutoff = 'q97', label = TRUE))
}
Warning: Could not find GRIN1 in the default search locations, found in RNA assay instead
Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features), :
The following requested variables were not found: GRIN3
Error: None of the requested features were found: GRIN3 in slot data
Add annotations - first pass NPC-stem NPC-glia NPC-SOX6 Neurons-Glut Progenitors-div NPC-SOX2-OXT-fibro Neural-Stem stem cell Neuron-GABA Neuron-epithelial
celltypes2 <- c("NPC","NPC","NPC","Neurons","NPC",
"NPC","NPC","Stem","Neurons","Epithelial")
integrated_seurat <- annotate(integrated_seurat, annotations = celltypes2, to_label = "integrated_snn_res.0.3",annotation_name = "Celltypes2")
DimPlot(integrated_seurat, label = TRUE)
DimPlot(integrated_seurat, split.by = "DiseaseStatus")
DimPlot(integrated_seurat, split.by = "DiseaseStatus", group.by = "Celltypes1")
NA
NA
celltypes3 <- c("NPC","NPC","NPC","Neurons","NPC-div",
"Neuro-NPC","Neural-Stem","Stem","Neurons","Neural-epi")
integrated_seurat <- annotate(integrated_seurat, annotations = celltypes3, to_label = "integrated_snn_res.0.3",annotation_name = "Celltypes3")
DimPlot(integrated_seurat, label = TRUE)
table(integrated_seurat$IPSC_Line)
TD07 TD22 x2965 x3448
5310 6378 5957 6124
DEG in cell types 3 groups
DGE <- FindMarkers(seu_sub, ident.1 = "PD", ident.2 = "Con")
| | 0 % ~calculating
|+ | 1 % ~12s
|++ | 2 % ~13s
|++ | 3 % ~13s
|+++ | 4 % ~13s
|+++ | 5 % ~13s
|++++ | 6 % ~13s
|++++ | 7 % ~13s
|+++++ | 9 % ~12s
|+++++ | 10% ~12s
|++++++ | 11% ~12s
|++++++ | 12% ~12s
|+++++++ | 13% ~12s
|+++++++ | 14% ~12s
|++++++++ | 15% ~12s
|++++++++ | 16% ~12s
|+++++++++ | 17% ~12s
|++++++++++ | 18% ~11s
|++++++++++ | 19% ~11s
|+++++++++++ | 20% ~11s
|+++++++++++ | 21% ~11s
|++++++++++++ | 22% ~11s
|++++++++++++ | 23% ~11s
|+++++++++++++ | 24% ~10s
|+++++++++++++ | 26% ~10s
|++++++++++++++ | 27% ~10s
|++++++++++++++ | 28% ~10s
|+++++++++++++++ | 29% ~10s
|+++++++++++++++ | 30% ~10s
|++++++++++++++++ | 31% ~10s
|++++++++++++++++ | 32% ~09s
|+++++++++++++++++ | 33% ~09s
|++++++++++++++++++ | 34% ~09s
|++++++++++++++++++ | 35% ~09s
|+++++++++++++++++++ | 36% ~09s
|+++++++++++++++++++ | 37% ~09s
|++++++++++++++++++++ | 38% ~09s
|++++++++++++++++++++ | 39% ~08s
|+++++++++++++++++++++ | 40% ~08s
|+++++++++++++++++++++ | 41% ~08s
|++++++++++++++++++++++ | 43% ~08s
|++++++++++++++++++++++ | 44% ~08s
|+++++++++++++++++++++++ | 45% ~08s
|+++++++++++++++++++++++ | 46% ~07s
|++++++++++++++++++++++++ | 47% ~07s
|++++++++++++++++++++++++ | 48% ~07s
|+++++++++++++++++++++++++ | 49% ~07s
|+++++++++++++++++++++++++ | 50% ~07s
|++++++++++++++++++++++++++ | 51% ~07s
|+++++++++++++++++++++++++++ | 52% ~07s
|+++++++++++++++++++++++++++ | 53% ~06s
|++++++++++++++++++++++++++++ | 54% ~06s
|++++++++++++++++++++++++++++ | 55% ~06s
|+++++++++++++++++++++++++++++ | 56% ~06s
|+++++++++++++++++++++++++++++ | 57% ~06s
|++++++++++++++++++++++++++++++ | 59% ~06s
|++++++++++++++++++++++++++++++ | 60% ~05s
|+++++++++++++++++++++++++++++++ | 61% ~05s
|+++++++++++++++++++++++++++++++ | 62% ~05s
|++++++++++++++++++++++++++++++++ | 63% ~05s
|++++++++++++++++++++++++++++++++ | 64% ~05s
|+++++++++++++++++++++++++++++++++ | 65% ~05s
|+++++++++++++++++++++++++++++++++ | 66% ~05s
|++++++++++++++++++++++++++++++++++ | 67% ~04s
|+++++++++++++++++++++++++++++++++++ | 68% ~04s
|+++++++++++++++++++++++++++++++++++ | 69% ~04s
|++++++++++++++++++++++++++++++++++++ | 70% ~04s
|++++++++++++++++++++++++++++++++++++ | 71% ~04s
|+++++++++++++++++++++++++++++++++++++ | 72% ~04s
|+++++++++++++++++++++++++++++++++++++ | 73% ~04s
|++++++++++++++++++++++++++++++++++++++ | 74% ~03s
|++++++++++++++++++++++++++++++++++++++ | 76% ~03s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~03s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~03s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~03s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~03s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~02s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~02s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~02s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=13s
DimPlot(seu_sub, group.by = "DiseaseStatus")
table(seu_sub$DiseaseStatus)
Con PD
1746 1647
Neurons DGE volcano plot
library(EnhancedVolcano)
EnhancedVolcano(DGE,
lab = rownames(DGE),
#xlim = c(-0.25,0.25),
x = 'avg_log2FC',
y = 'p_val_adj',
pCutoff = 0.000001,
FCcutoff = 1,
pointSize = 3.0,
labSize = 6.0)
NA
NA
NA
DGE in NPCs: NPC-div
seu_sub <- subset(integrated_seurat, idents = "NPC-div")
dim(seu_sub)
[1] 58395 2936
table(seu_sub$IPSC_Line)
TD07 TD22 x2965 x3448
753 1106 583 494
table(seu_sub$DiseaseStatus)
Con PD
1600 1336
seu_sub <- ScaleData(seu_sub)
Centering and scaling data matrix
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 7%
|
|===== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========= | 14%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 19%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 24%
|
|================ | 25%
|
|================= | 27%
|
|================== | 29%
|
|==================== | 31%
|
|===================== | 32%
|
|====================== | 34%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 39%
|
|========================== | 41%
|
|=========================== | 42%
|
|============================ | 44%
|
|============================= | 46%
|
|============================== | 47%
|
|=============================== | 49%
|
|================================= | 51%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 56%
|
|===================================== | 58%
|
|====================================== | 59%
|
|======================================= | 61%
|
|======================================== | 63%
|
|========================================= | 64%
|
|========================================== | 66%
|
|=========================================== | 68%
|
|============================================ | 69%
|
|============================================== | 71%
|
|=============================================== | 73%
|
|================================================ | 75%
|
|================================================= | 76%
|
|================================================== | 78%
|
|=================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 83%
|
|====================================================== | 85%
|
|======================================================= | 86%
|
|======================================================== | 88%
|
|========================================================= | 90%
|
|=========================================================== | 92%
|
|============================================================ | 93%
|
|============================================================= | 95%
|
|============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================| 100%
seu_sub <- NormalizeData(seu_sub)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Idents(seu_sub) <- "DiseaseStatus"
levels(seu_sub)
[1] "PD" "Con"
DGE <- FindMarkers(seu_sub, ident.1 = "PD", ident.2 = "Con")
| | 0 % ~calculating
|+ | 1 % ~14s
|++ | 2 % ~12s
|++ | 3 % ~11s
|+++ | 4 % ~11s
|+++ | 5 % ~10s
|++++ | 6 % ~10s
|++++ | 7 % ~10s
|+++++ | 8 % ~09s
|+++++ | 9 % ~09s
|++++++ | 10% ~09s
|++++++ | 11% ~09s
|+++++++ | 12% ~09s
|+++++++ | 13% ~08s
|++++++++ | 14% ~08s
|++++++++ | 15% ~08s
|+++++++++ | 16% ~08s
|+++++++++ | 17% ~08s
|++++++++++ | 18% ~08s
|++++++++++ | 19% ~08s
|+++++++++++ | 20% ~07s
|+++++++++++ | 21% ~07s
|++++++++++++ | 22% ~07s
|++++++++++++ | 23% ~07s
|+++++++++++++ | 24% ~07s
|+++++++++++++ | 25% ~07s
|++++++++++++++ | 26% ~07s
|++++++++++++++ | 27% ~07s
|+++++++++++++++ | 28% ~07s
|+++++++++++++++ | 29% ~07s
|++++++++++++++++ | 30% ~06s
|++++++++++++++++ | 31% ~06s
|+++++++++++++++++ | 32% ~06s
|+++++++++++++++++ | 33% ~06s
|++++++++++++++++++ | 34% ~06s
|++++++++++++++++++ | 35% ~06s
|+++++++++++++++++++ | 36% ~06s
|+++++++++++++++++++ | 37% ~06s
|++++++++++++++++++++ | 38% ~06s
|++++++++++++++++++++ | 39% ~06s
|+++++++++++++++++++++ | 40% ~05s
|+++++++++++++++++++++ | 41% ~05s
|++++++++++++++++++++++ | 42% ~05s
|++++++++++++++++++++++ | 43% ~05s
|+++++++++++++++++++++++ | 44% ~05s
|+++++++++++++++++++++++ | 45% ~05s
|++++++++++++++++++++++++ | 46% ~05s
|++++++++++++++++++++++++ | 47% ~05s
|+++++++++++++++++++++++++ | 48% ~05s
|+++++++++++++++++++++++++ | 49% ~05s
|++++++++++++++++++++++++++ | 51% ~05s
|++++++++++++++++++++++++++ | 52% ~04s
|+++++++++++++++++++++++++++ | 53% ~04s
|+++++++++++++++++++++++++++ | 54% ~04s
|++++++++++++++++++++++++++++ | 55% ~04s
|++++++++++++++++++++++++++++ | 56% ~04s
|+++++++++++++++++++++++++++++ | 57% ~04s
|+++++++++++++++++++++++++++++ | 58% ~04s
|++++++++++++++++++++++++++++++ | 59% ~04s
|++++++++++++++++++++++++++++++ | 60% ~04s
|+++++++++++++++++++++++++++++++ | 61% ~04s
|+++++++++++++++++++++++++++++++ | 62% ~04s
|++++++++++++++++++++++++++++++++ | 63% ~03s
|++++++++++++++++++++++++++++++++ | 64% ~03s
|+++++++++++++++++++++++++++++++++ | 65% ~03s
|+++++++++++++++++++++++++++++++++ | 66% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|++++++++++++++++++++++++++++++++++ | 68% ~03s
|+++++++++++++++++++++++++++++++++++ | 69% ~03s
|+++++++++++++++++++++++++++++++++++ | 70% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~03s
|++++++++++++++++++++++++++++++++++++ | 72% ~03s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~02s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
DimPlot(seu_sub, group.by = "DiseaseStatus")
EnhancedVolcano(DGE,
lab = rownames(DGE),
#xlim = c(-0.25,0.25),
x = 'avg_log2FC',
y = 'p_val_adj',
pCutoff = 0.000001,
FCcutoff = 1,
pointSize = 3.0,
labSize = 6.0)
ft <- c("TEME132C","TMEM132D","GPC3","NRG1","PDE1A",
"TTC6","SPON1","NTN1","PK1B","BNC2")
DotPlot(seu_sub, features = ft) +RotatedAxis()
Warning in FetchData.Seurat(object = object, vars = features, cells = cells) :
The following requested variables were not found: TEME132C, PK1B
Warning: Scaling data with a low number of groups may produce misleading results
#head(DGE)
# filter for significant pvalues
DGE.ft <- DGE %>% filter(p_val_adj <= 0.01)
dim(DGE.ft)
[1] 544 5
up <- DGE.ft %>% filter(avg_log2FC > 0.1)
dim(up)
[1] 313 5
down <- DGE.ft %>% filter(avg_log2FC < -0.1)
dim(down)
[1] 231 5
Idents(seu_sub) <- "DiseaseStatus"
levels(seu_sub)
[1] "PD" "Con"
DGE <- FindMarkers(seu_sub, ident.1 = "PD", ident.2 = "Con")
| | 0 % ~calculating
|+ | 1 % ~01m 04s
|++ | 2 % ~57s
|++ | 3 % ~55s
|+++ | 4 % ~54s
|+++ | 5 % ~52s
|++++ | 6 % ~51s
|++++ | 7 % ~50s
|+++++ | 8 % ~49s
|+++++ | 9 % ~49s
|++++++ | 10% ~48s
|++++++ | 11% ~47s
|+++++++ | 12% ~46s
|+++++++ | 13% ~46s
|++++++++ | 14% ~45s
|++++++++ | 15% ~44s
|+++++++++ | 16% ~43s
|+++++++++ | 17% ~43s
|++++++++++ | 18% ~42s
|++++++++++ | 19% ~42s
|+++++++++++ | 20% ~41s
|+++++++++++ | 21% ~40s
|++++++++++++ | 22% ~40s
|++++++++++++ | 23% ~40s
|+++++++++++++ | 24% ~39s
|+++++++++++++ | 26% ~39s
|++++++++++++++ | 27% ~38s
|++++++++++++++ | 28% ~37s
|+++++++++++++++ | 29% ~37s
|+++++++++++++++ | 30% ~38s
|++++++++++++++++ | 31% ~37s
|++++++++++++++++ | 32% ~36s
|+++++++++++++++++ | 33% ~36s
|+++++++++++++++++ | 34% ~35s
|++++++++++++++++++ | 35% ~35s
|++++++++++++++++++ | 36% ~34s
|+++++++++++++++++++ | 37% ~33s
|+++++++++++++++++++ | 38% ~33s
|++++++++++++++++++++ | 39% ~32s
|++++++++++++++++++++ | 40% ~32s
|+++++++++++++++++++++ | 41% ~31s
|+++++++++++++++++++++ | 42% ~31s
|++++++++++++++++++++++ | 43% ~30s
|++++++++++++++++++++++ | 44% ~29s
|+++++++++++++++++++++++ | 45% ~29s
|+++++++++++++++++++++++ | 46% ~28s
|++++++++++++++++++++++++ | 47% ~28s
|++++++++++++++++++++++++ | 48% ~27s
|+++++++++++++++++++++++++ | 49% ~27s
|+++++++++++++++++++++++++ | 50% ~26s
|++++++++++++++++++++++++++ | 51% ~26s
|+++++++++++++++++++++++++++ | 52% ~25s
|+++++++++++++++++++++++++++ | 53% ~24s
|++++++++++++++++++++++++++++ | 54% ~24s
|++++++++++++++++++++++++++++ | 55% ~23s
|+++++++++++++++++++++++++++++ | 56% ~23s
|+++++++++++++++++++++++++++++ | 57% ~22s
|++++++++++++++++++++++++++++++ | 58% ~22s
|++++++++++++++++++++++++++++++ | 59% ~21s
|+++++++++++++++++++++++++++++++ | 60% ~21s
|+++++++++++++++++++++++++++++++ | 61% ~20s
|++++++++++++++++++++++++++++++++ | 62% ~19s
|++++++++++++++++++++++++++++++++ | 63% ~19s
|+++++++++++++++++++++++++++++++++ | 64% ~18s
|+++++++++++++++++++++++++++++++++ | 65% ~18s
|++++++++++++++++++++++++++++++++++ | 66% ~17s
|++++++++++++++++++++++++++++++++++ | 67% ~17s
|+++++++++++++++++++++++++++++++++++ | 68% ~16s
|+++++++++++++++++++++++++++++++++++ | 69% ~16s
|++++++++++++++++++++++++++++++++++++ | 70% ~15s
|++++++++++++++++++++++++++++++++++++ | 71% ~15s
|+++++++++++++++++++++++++++++++++++++ | 72% ~14s
|+++++++++++++++++++++++++++++++++++++ | 73% ~14s
|++++++++++++++++++++++++++++++++++++++ | 74% ~13s
|++++++++++++++++++++++++++++++++++++++ | 76% ~13s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~12s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~11s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~11s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~10s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~10s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~09s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~09s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~08s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~08s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~07s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~07s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~06s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~06s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~05s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~05s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~04s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~04s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=50s
DimPlot(seu_sub, group.by = "DiseaseStatus")
library(EnhancedVolcano)
EnhancedVolcano(DGE,
lab = rownames(DGE),
xlim = c(-3,3),
x = 'avg_log2FC',
y = 'p_val_adj',
pCutoff = 0.01,
FCcutoff = 1,
pointSize = 3.0,
labSize = 6.0)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
ft <- c("RBFOX1","TMEM132C","GPC3","NRG1","LINGO2",
"TTC6","SLIT1","CHN2","NTN1","ALK")
DotPlot(seu_sub, features = ft) +RotatedAxis()
Warning: Scaling data with a low number of groups may produce misleading results
#head(DGE)
# filter for significant pvalues
DGE.ft <- DGE %>% filter(p_val_adj <= 0.01)
dim(DGE.ft)
[1] 777 5
up <- DGE.ft %>% filter(avg_log2FC > 0.1)
dim(up)
[1] 383 5
down <- DGE.ft %>% filter(avg_log2FC < -0.1)
dim(down)
[1] 394 5
From the pseudo bulk
ft <- c("CLRN1","NR2E1","IFI44","DMRT3","FEZF2",
"KCNJ16","RGPD2","CORIN","GALR1","SIM1")
DotPlot(seu_sub, features = ft) +RotatedAxis()
Warning: Scaling data with a low number of groups may produce misleading results
DGEup <- getGSA(DGE.ft, up_or_down = "up",
LFCthresh = 0.02,
pval_thresh = 0.05)
Uploading data to Enrichr... Done.
Querying KEGG_2019_Human... Done.
Querying GWAS_Catalog_2019... Done.
Querying GO_Biological_Process_2023... Done.
Querying GO_Cellular_Component_2023... Done.
Querying GO_Molecular_Function_2023... Done.
Parsing results... Done.
DGEdown <- getGSA(DGE.ft, up_or_down = "down",
LFCthresh = -0.02,
pval_thresh = 0.05)
Uploading data to Enrichr... Done.
Querying KEGG_2019_Human... Done.
Querying GWAS_Catalog_2019... Done.
Querying GO_Biological_Process_2023... Done.
Querying GO_Cellular_Component_2023... Done.
Querying GO_Molecular_Function_2023... Done.
Parsing results... Done.
DGEboth <- getGSA(DGE.ft, up_or_down = "both",
LFCthresh = 0,
pval_thresh = 0.01)
Uploading data to Enrichr... Done.
Querying KEGG_2019_Human... Done.
Querying GWAS_Catalog_2019... Done.
Querying GO_Biological_Process_2023... Done.
Querying GO_Cellular_Component_2023... Done.
Querying GO_Molecular_Function_2023... Done.
Parsing results... Done.
The DGE.ft to compare to with the pseudobulk results
# write the DGE
write.csv(DGE.ft, "/Users/rhalenathomas/Documents/Data/scRNAseq/ParseExample/Experiment1-mini12/DGEseuratNPCs.csv")
Create a sum of counts data table
sum_counts <- AggregateExpression(integrated_seurat, assay = "RNA", group.by = "sample", add.ident = "sample")
# this seems to group by the group and the active ident together
colnames(integrated_seurat@meta.data)
dim(sum_counts$RNA)
sum_counts_df <- as.data.frame(sum_counts$RNA)
dim(sum_counts_df)
write.csv(sum_counts_df, "/Users/rhalenathomas/Documents/Data/scRNAseq/ParseExample/Experiment1-mini12/SumCounts_Integrated_by_sample_celltype.csv")
sum_counts_df[1:4,1:5]
Try to separately variables in the aggregation
colnames(integrated_seurat@meta.data)
sum_counts <- AggregateExpression(integrated_seurat, assay = "RNA", group.by = c("Batch","IPSC_Line","DiseaseStatus"))
# this seems to group by the group and the active ident together
colnames(integrated_seurat@meta.data)
dim(sum_counts$RNA)
sum_counts_df <- as.data.frame(sum_counts$RNA)
dim(sum_counts_df)
# all cell types are integrated
write.csv(sum_counts_df, "/Users/rhalenathomas/Documents/Data/scRNAseq/ParseExample/Experiment1-mini12/SumCounts_Integrated_allCelltypes_12samples.csv")
Try to make the MDS plots and the DESeq2 DGE
library( "DESeq2" )
library(ggplot2)
Visualize groups